library(R.utils)
library(dplyr)
library(stringr)
library(Seurat)
library(purrr)
library(scCATCH)
library(ggplot2)
library(SingleR)
library(msigdbr)
library(tibble)
library(clusterProfiler)
library(org.Hs.eg.db)
library(org.Mm.eg.db)
library(ggpubr)
library(openxlsx)
theme_set(theme_bw())
This project uses the data from Grosselin et al, 2019, which is uploaded to GEO.
mkdir -p data
cd data
wget -r -np -nd 'https://ftp.ncbi.nlm.nih.gov/geo/series/GSE117nnn/GSE117309/suppl/GSE117309_RAW.tar' -R "index.html*" # Omitting the index file
# Untar the main file
untar("data/GSE117309_RAW.tar", exdir="data/")
# Untar sc-RNAseq files
RNA_files <- list.files(path="data/",pattern="scRNA")
untar_RNA_files <- function(file){untar(paste0("data/",file), exdir="data/RNA_files")}
sapply(RNA_files, untar_RNA_files)
# Create function to load the files
load_scRNA_files <- function(model, organism){
directory <- paste0("data/RNA_files/filtered_gene_bc_matrices_",model,"/",organism,"/")
expression_matrix <- ReadMtx(
mtx=paste0(directory, "matrix.mtx"),
features=paste0(directory, "genes.tsv"),
cells=paste0(directory, "barcodes.tsv"))
# Correct gene names for easier automatization
expression_matrix@Dimnames[[1]] <- str_remove_all(expression_matrix@Dimnames[[1]],
pattern=paste0(organism,"_"))
seurat_object <- CreateSeuratObject(counts = expression_matrix, project=model)
return(seurat_object)
}
models <- c("HBCx-95","HBCx-95_CAPAR", "HBCx-22","HBCx22-TAMR")
scRNA_files_hg19 <- mapply(FUN=load_scRNA_files, models, organism="hg19")
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
names(scRNA_files_hg19) <- paste0(names(scRNA_files_hg19), "_hg19")
scRNA_files_mm10 <- mapply(FUN=load_scRNA_files, models, organism="mm10")
names(scRNA_files_mm10) <- paste0(names(scRNA_files_mm10), "_mm10")
# Normalize and identify variable features
scRNA_files_hg19 <- lapply(X = scRNA_files_hg19, FUN = function(x) {
x <- NormalizeData(x)
x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})
# Select features repeatedly variable between the 4 datasets
features <- SelectIntegrationFeatures(object.list = scRNA_files_hg19)
# Select anchors for integration
brca_anchors <- FindIntegrationAnchors(object.list = scRNA_files_hg19, anchor.features = features)
## Warning in CheckDuplicateCellNames(object.list = object.list): Some cell names
## are duplicated across objects provided. Renaming to enforce unique cell names.
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2276 anchors
## Filtering anchors
## Retained 1594 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2558 anchors
## Filtering anchors
## Retained 1046 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 3666 anchors
## Filtering anchors
## Retained 1570 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2201 anchors
## Filtering anchors
## Retained 1215 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2766 anchors
## Filtering anchors
## Retained 1635 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 3104 anchors
## Filtering anchors
## Retained 2412 anchors
# Create the integrated data assay
scRNA_combined <- IntegrateData(anchorset=brca_anchors)
## Merging dataset 4 into 3
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 1 into 2
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 2 1 into 3 4
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
# Use the integrated assay as default
DefaultAssay(scRNA_combined) <- "RNA"
# Creating a new slot with the percentage of counts on mitochondrial reads
scRNA_combined[["percent.mt"]] <- PercentageFeatureSet(scRNA_combined, pattern="MT-")
# Violin plots of specific features
VlnPlot(scRNA_combined,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
ncol = 3,
split.by="orig.ident")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
plot_qc_hg38_1 <- VlnPlot(scRNA_combined,
features = c("nFeature_RNA"),
split.by="orig.ident") +
geom_hline(yintercept=c(200,6000), linetype="dashed", color = "firebrick4") + ylim(0,7000)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
plot_qc_hg38_2 <- VlnPlot(scRNA_combined,
features = c("percent.mt"),
split.by="orig.ident") +
geom_hline(yintercept=10, linetype="dashed", color = "firebrick4") + ylim(0,50)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
We will remove cells with a percentage of mitochondrial reads too high (>5%) and with an abnormally high or low number of feature counts, that can suggest empty droplets or multiplets in the microfluidic system, respectively.
scRNA_combined <- subset(scRNA_combined, subset = nFeature_RNA > 200 & nFeature_RNA < 6000 & percent.mt < 10)
VlnPlot(scRNA_combined,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
ncol = 3,
split.by="orig.ident")
First we will explore the UMAPs without integration
# First let's do the same analysis without integration
scRNA_combined <- FindVariableFeatures(scRNA_combined)
scRNA_combined <- ScaleData(scRNA_combined, verbose = FALSE)
scRNA_combined <- RunPCA(scRNA_combined, npcs = 30, verbose = FALSE)
scRNA_combined <- RunUMAP(scRNA_combined, reduction = "pca", dims = 1:30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 17:14:52 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:14:52 Read 3006 rows and found 30 numeric columns
## 17:14:52 Using Annoy for neighbor search, n_neighbors = 30
## 17:14:52 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:14:53 Writing NN index file to temp file C:\Users\alba_\AppData\Local\Temp\Rtmps7vENO\file42f0639647fe
## 17:14:53 Searching Annoy index using 1 thread, search_k = 3000
## 17:14:54 Annoy recall = 100%
## 17:14:54 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:14:55 Found 2 connected components, falling back to 'spca' initialization with init_sdev = 1
## 17:14:55 Using 'irlba' for PCA
## 17:14:55 PCA: 2 components explained 46.18% variance
## 17:14:55 Scaling init to sdev = 1
## 17:14:55 Commencing optimization for 500 epochs, with 120564 positive edges
## 17:15:05 Optimization finished
scRNA_combined <- FindNeighbors(scRNA_combined, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
scRNA_combined <- FindClusters(scRNA_combined, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3006
## Number of edges: 115260
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8814
## Number of communities: 7
## Elapsed time: 0 seconds
p1_hg38 <- DimPlot(scRNA_combined, reduction = "umap", group.by = "orig.ident") + ggtitle("hg19 pre-integration")
p2 <- DimPlot(scRNA_combined, reduction = "umap", label = TRUE, repel = TRUE)
p1_hg38 + p2
# Change the default to the integrated dataset
DefaultAssay(scRNA_combined) <- "integrated"
scRNA_combined <- ScaleData(scRNA_combined, verbose = FALSE)
scRNA_combined <- RunPCA(scRNA_combined, npcs = 30, verbose = FALSE)
scRNA_combined <- RunUMAP(scRNA_combined, reduction = "pca", dims = 1:30)
## 17:15:10 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:15:10 Read 3006 rows and found 30 numeric columns
## 17:15:10 Using Annoy for neighbor search, n_neighbors = 30
## 17:15:10 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:15:10 Writing NN index file to temp file C:\Users\alba_\AppData\Local\Temp\Rtmps7vENO\file42f04ae15cc0
## 17:15:10 Searching Annoy index using 1 thread, search_k = 3000
## 17:15:11 Annoy recall = 100%
## 17:15:11 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:15:13 Initializing from normalized Laplacian + noise (using irlba)
## 17:15:13 Commencing optimization for 500 epochs, with 129870 positive edges
## 17:15:23 Optimization finished
scRNA_combined <- FindNeighbors(scRNA_combined, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
scRNA_combined <- FindClusters(scRNA_combined, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3006
## Number of edges: 151631
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7946
## Number of communities: 5
## Elapsed time: 0 seconds
# Visualization
p2_hg38 <- DimPlot(scRNA_combined, reduction = "umap", group.by = "orig.ident") + ggtitle("hg19 post-integration")
p3 <- DimPlot(scRNA_combined, reduction = "umap", label = TRUE, repel = TRUE)
After normalization and integration, the UMAPs of all subsets look quite similar, so we can assume integration is correct and we can proceed forward. Once this has been verified, we can go back a few steps and refine our analysis
scRNA_combined <- JackStraw(scRNA_combined, num.replicate = 100)
scRNA_combined <- ScoreJackStraw(scRNA_combined, dims=1:20)
JackStrawPlot(scRNA_combined, dims=1:20)
## Warning: Removed 28000 rows containing missing values (`geom_point()`).
ElbowPlot(scRNA_combined)
Based on these two graphs, we may consider using 13-15 PCs.
scRNA_combined <- FindNeighbors(scRNA_combined, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
scRNA_combined <- FindClusters(scRNA_combined, resolution = 0.5) # Try different levels of resolution and chose the one that results in cluster with most biological sense
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3006
## Number of edges: 151631
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7946
## Number of communities: 5
## Elapsed time: 0 seconds
scRNA_combined <- RunUMAP(scRNA_combined, dims = 1:30)
## 17:17:37 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:17:37 Read 3006 rows and found 30 numeric columns
## 17:17:37 Using Annoy for neighbor search, n_neighbors = 30
## 17:17:37 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:17:37 Writing NN index file to temp file C:\Users\alba_\AppData\Local\Temp\Rtmps7vENO\file42f06347f0c
## 17:17:37 Searching Annoy index using 1 thread, search_k = 3000
## 17:17:38 Annoy recall = 100%
## 17:17:39 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:17:40 Initializing from normalized Laplacian + noise (using irlba)
## 17:17:40 Commencing optimization for 500 epochs, with 129870 positive edges
## 17:17:48 Optimization finished
DimPlot(scRNA_combined, reduction="umap")
scRNA_combined$orig.ident <- factor(x = scRNA_combined$orig.ident, levels = c("HBCx-95", "HBCx-95_CAPAR","HBCx-22","HBCx22-TAMR"))
plot_hg19_clustering <- DimPlot(scRNA_combined, reduction = "umap", split.by = "orig.ident", ncol=2) + ggtitle("Clustering in hg19 cells") + theme(plot.title = element_text(hjust=0.5))
plot_hg19_clustering
hg19_markers <- FindAllMarkers(scRNA_combined, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
hg19_markers %>%
group_by(cluster) %>%
slice_max(n = 2, order_by = avg_log2FC)
## # A tibble: 10 × 7
## # Groups: cluster [5]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 2.32e-209 2.66 0.905 0.481 4.64e-206 0 KRT16
## 2 8.55e-171 2.38 0.813 0.369 1.71e-167 0 KRT6B
## 3 9.19e- 60 0.787 0.916 0.823 1.84e- 56 1 CLEC3A
## 4 3.16e- 54 0.681 0.928 0.847 6.32e- 51 1 AGR2
## 5 5.50e-155 1.98 0.963 0.7 1.10e-151 2 HIST1H4C
## 6 8.36e-214 1.87 0.968 0.63 1.67e-210 2 KIAA0101
## 7 4.88e- 7 1.41 0.733 0.727 9.76e- 4 3 SCGB2A2
## 8 4.34e- 4 1.28 0.593 0.595 8.68e- 1 3 SCGB1D2
## 9 1.41e- 91 2.31 0.957 0.704 2.83e- 88 4 UBE2C
## 10 6.28e-156 2.22 0.996 0.677 1.26e-152 4 PTTG1
hg19_markers %>%
group_by(cluster) %>%
top_n(n = 5, wt = avg_log2FC) -> top5
plot_hg19_heatmap <- DoHeatmap(scRNA_combined, features = top5$gene) + NoLegend()
MSigDB_df <- msigdbr(species = "Homo sapiens")
fgsea_sets<- MSigDB_df %>% split(x = .$gene_symbol, f = .$gs_name)
plot_GSEA <- function(cluster_number){
genes <- hg19_markers %>% filter(cluster==as.character(cluster_number)) %>%
arrange(desc(p_val_adj)) %>%
dplyr::select(gene, avg_log2FC)
# Create dataframe with genes
ranks <- deframe(genes)
# Perform GSEA analysis
fgseaRes <- fgsea(fgsea_sets,
stats = ranks,
minSize=10,
maxSize=500,
nperm=1000000)
# Plot
ggplot(fgseaRes %>% filter(padj < 0.01) %>% head(n= 20), aes(reorder(pathway, NES), NES)) +
geom_col(aes(fill= NES < 7.5)) +
coord_flip() +
labs(x="Pathway",
y="Normalized Enrichment Score",
title=paste0("Hallmark pathways NES from GSEA for cluster",as.character(cluster_number)))
}
for (cluster in 0:5){plot_GSEA(cluster)}
plot_GO <- function(cluster_number){
genes <- hg19_markers %>% filter(cluster==as.character(cluster_number))
# Calculate enrichment
enrichment <- enrichGO(gene = genes$gene,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
enrichment <- clusterProfiler::simplify(enrichment, cutoff=0.7, by="p.adjust", select_fun=min)
GO_ggdata <- enrichment %>%
as_data_frame() %>%
arrange(Count)
GO_ggdata$Description <- factor(GO_ggdata$Description, levels = GO_ggdata$Description)
print(tail(GO_ggdata))
ggplot(GO_ggdata, aes(x = Description, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity") +
scale_colour_viridis_d(begin=0,end=1) +
coord_flip() +
ylab("Number of genes") +
xlab("GO Terms") +
theme(axis.text.y = element_text(size=10))
}
for (cluster in 0:4){print(plot_GO(cluster))}
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0050900 leukocyte … 21/197 398/18… 1.22e- 9 3.17e- 7 2.41e- 7 ITGA6… 21
## 2 GO:0001667 ameboidal-… 21/197 492/18… 4.96e- 8 5.24e- 6 3.98e- 6 KRT16… 21
## 3 GO:0052547 regulation… 22/197 459/18… 2.86e- 9 6.04e- 7 4.59e- 7 CAV1/… 22
## 4 GO:0008544 epidermis … 27/197 362/18… 1.05e-15 1.18e-12 8.96e-13 LAMB3… 27
## 5 GO:0043588 skin devel… 28/197 302/18… 9.82e-19 1.66e-15 1.26e-15 KRT16… 28
## 6 GO:0042060 wound heal… 36/197 442/18… 5.30e-22 1.79e-18 1.36e-18 ITGB6… 36
## # … with abbreviated variable name ¹​GeneRatio
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adj…² qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0001894 tissue homeos… 5/37 275/18… 1.87e-4 9.55e-3 6.75e-3 TFF3/… 5
## 2 GO:0097191 extrinsic apo… 6/37 224/18… 4.43e-6 1.00e-3 7.09e-4 KRT18… 6
## 3 GO:2001234 negative regu… 6/37 233/18… 5.55e-6 1.00e-3 7.09e-4 IFI6/… 6
## 4 GO:0009615 response to v… 6/37 409/18… 1.30e-4 7.65e-3 5.40e-3 BST2/… 6
## 5 GO:2001233 regulation of… 7/37 374/18… 6.93e-6 1.00e-3 7.09e-4 TNFSF… 7
## 6 GO:0010038 response to m… 8/37 360/18… 3.82e-7 2.24e-4 1.58e-4 SLC40… 8
## # … with abbreviated variable names ¹​GeneRatio, ²​p.adjust
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0045787 positive r… 25/181 362/18… 1.03e-14 2.60e-12 2.27e-12 UBE2C… 25
## 2 GO:0006260 DNA replic… 28/181 286/18… 2.19e-20 1.10e-17 9.60e-18 RPA3/… 28
## 3 GO:0098813 nuclear ch… 29/181 321/18… 4.09e-20 1.71e-17 1.50e-17 UBE2C… 29
## 4 GO:0140014 mitotic nu… 31/181 325/18… 3.63e-22 3.05e-19 2.66e-19 UBE2C… 31
## 5 GO:0007059 chromosome… 35/181 382/18… 2.18e-24 5.49e-21 4.79e-21 UBE2C… 35
## 6 GO:0000280 nuclear di… 37/181 481/18… 4.27e-23 5.37e-20 4.68e-20 UBE2C… 37
## # … with abbreviated variable name ¹​GeneRatio
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adj…² qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0019318 hexose metabo… 4/25 229/18… 2.17e-4 7.80e-3 5.35e-3 PGK1/… 4
## 2 GO:0051402 neuron apopto… 5/25 255/18… 1.83e-5 9.86e-4 6.75e-4 EGLN3… 5
## 3 GO:0070997 neuron death 5/25 368/18… 1.05e-4 4.52e-3 3.10e-3 EGLN3… 5
## 4 GO:0071456 cellular resp… 6/25 151/18… 3.67e-8 7.92e-6 5.42e-6 NDRG1… 6
## 5 GO:0001666 response to h… 8/25 296/18… 2.82e-9 1.71e-6 1.17e-6 NDRG1… 8
## 6 GO:0036293 response to d… 8/25 309/18… 3.96e-9 1.71e-6 1.17e-6 NDRG1… 8
## # … with abbreviated variable names ¹​GeneRatio, ²​p.adjust
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0140694 non-membra… 25/151 386/18… 5.84e-16 8.73e-14 7.70e-14 BIRC5… 25
## 2 GO:0044772 mitotic ce… 26/151 473/18… 7.28e-15 6.80e-13 6.00e-13 CDKN3… 26
## 3 GO:0098813 nuclear ch… 33/151 321/18… 2.64e-27 9.86e-25 8.70e-25 PTTG1… 33
## 4 GO:0140014 mitotic nu… 37/151 325/18… 2.80e-32 3.71e-29 3.27e-29 PTTG1… 37
## 5 GO:0007059 chromosome… 39/151 382/18… 3.31e-32 3.71e-29 3.27e-29 PTTG1… 39
## 6 GO:0000280 nuclear di… 42/151 481/18… 6.20e-32 4.64e-29 4.09e-29 PTTG1… 42
## # … with abbreviated variable name ¹​GeneRatio
hg19_geneinfo <- rev_gene(data = scRNA_combined[['integrated']]@data,
data_type = "data",
species="Human",
geneinfo = geneinfo)
hg19_ann_1 <- createscCATCH(data = scRNA_combined[['integrated']]@data,
cluster = as.character(Idents(scRNA_combined)))
hg19_ann_1 <- findmarkergene(object = hg19_ann_1, species = "Human", marker = cellmatch, tissue = "Breast", cancer="Breast Cancer", use_method = "2")
## There are 535 potential marker genes in CellMatch database for Human Breast Cancer on Breast.
hg19_ann_1 <- findcelltype(hg19_ann_1)
hg19_ann_1@celltype
## cluster cluster_marker
## 1 0 G0S2, FN1, IGFBP5, ITGA6, TACSTD2, LIMA1, PMEPA1, DNAJB1, CD44
## 2 3 IGFBP5
## 3 2 CENPF, BIRC5, HELLS, BLVRB
## 4 1 BST2
## 5 4 CENPF, BIRC5
## cell_type celltype_score celltype_related_marker
## 1 Cancer Stem Cell 0.98 CD44, ITGA6
## 2 Killer Cell 0.50 IGFBP5
## 3 Helper T Cell 0.61 CENPF, BIRC5, HELLS
## 4 T Cell 0.50 BST2
## 5 Helper T Cell 0.58 CENPF, BIRC5
## PMID
## 1 24099815, 28986882, 27840965, 23799994, 23053657, 23768049, 28719381, 28579829, 28554844, 28399903, 28166194, 28148288, 28036386, 28012234, 27768764, 27630305, 27458252, 27133070, 27115469, 26893363, 26170011, 25990436, 24596390, 24297508, 24171482, 24144739, 24055090, 23543868, 23112116, 23056395, 23036368, 22761812, 22464177, 21979823, 21802218, 21092249, 22459349, 27640754, 25940879
## 2 28474673
## 3 28474673
## 4 28474673
## 5 28474673
hpca.se <- HumanPrimaryCellAtlasData()
## Warning: 'HumanPrimaryCellAtlasData' is deprecated.
## Use 'celldex::HumanPrimaryCellAtlasData' instead.
## See help("Deprecated")
## snapshotDate(): 2022-10-31
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
hpca.se
## class: SummarizedExperiment
## dim: 19363 713
## metadata(0):
## assays(1): logcounts
## rownames(19363): A1BG A1BG-AS1 ... ZZEF1 ZZZ3
## rowData names(0):
## colnames(713): GSM112490 GSM112491 ... GSM92233 GSM92234
## colData names(3): label.main label.fine label.ont
hg19_ann_2 <- SingleR(test = scRNA_combined[['integrated']]@data,
ref = hpca.se,
assay.type.test=1,
labels = hpca.se$label.main, clusters = scRNA_combined@meta.data[["seurat_clusters"]])
hg19_ann_2$labels
## [1] "Epithelial_cells" "Epithelial_cells" "BM & Prog." "Epithelial_cells"
## [5] "BM & Prog."
levels(scRNA_combined)
## [1] "0" "1" "2" "3" "4"
new_clusters_hg19 <- c("Fibroblasts","T-cells","Helper-T-Cells 1","NK cells","Helper-T-Cells 2")
names(new_clusters_hg19) <- levels(scRNA_combined)
scRNA_combined <- RenameIdents(scRNA_combined, new_clusters_hg19)
DimPlot(scRNA_combined, reduction="umap", label=T, label.box =T)
# Normalize and identify variable features
scRNA_files_mm10 <- lapply(X = scRNA_files_mm10, FUN = function(x) {
x <- NormalizeData(x)
x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})
# Select features repeatedly variable between the 4 datasets
features_mm10 <- SelectIntegrationFeatures(object.list = scRNA_files_mm10)
# Select anchors for integration
brca_anchors_mm10 <- FindIntegrationAnchors(object.list = scRNA_files_mm10, anchor.features = features_mm10)
## Warning in CheckDuplicateCellNames(object.list = object.list): Some cell names
## are duplicated across objects provided. Renaming to enforce unique cell names.
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2294 anchors
## Filtering anchors
## Retained 2030 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2088 anchors
## Filtering anchors
## Retained 1826 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2833 anchors
## Filtering anchors
## Retained 2294 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2231 anchors
## Filtering anchors
## Retained 2108 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2983 anchors
## Filtering anchors
## Retained 2716 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 2769 anchors
## Filtering anchors
## Retained 2515 anchors
# Create the integrated data assay
scRNA_combined_mm10 <- IntegrateData(anchorset=brca_anchors_mm10)
## Merging dataset 1 into 4
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 2 into 4 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 3 into 4 1 2
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
# Use the integrated assay as default
DefaultAssay(scRNA_combined_mm10) <- "RNA"
# Creating a new slot with the percentage of counts on mitochondrial reads
scRNA_combined_mm10[["percent.mt"]] <- PercentageFeatureSet(scRNA_combined_mm10, pattern="mt-")
# Violin plots of specific features
VlnPlot(scRNA_combined_mm10,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
ncol = 3,
split.by="orig.ident")
plot_qc_mm10_1 <- VlnPlot(scRNA_combined_mm10,
features = c("nFeature_RNA"),
split.by="orig.ident") +
geom_hline(yintercept=c(200,6000), linetype="dashed", color = "firebrick4") + ylim(0,7000)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
plot_qc_mm10_2 <- VlnPlot(scRNA_combined_mm10,
features = c("percent.mt"),
split.by="orig.ident") +
geom_hline(yintercept=10, linetype="dashed", color = "firebrick4") + ylim(0,50)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
We will remove cells with a percentage of mitochondrial reads too high (>5%) and with an abnormally high or low number of feature counts, that can suggest empty droplets or multiplets in the microfluidic system, respectively.
scRNA_combined_mm10 <- subset(scRNA_combined_mm10, subset = nFeature_RNA > 200 & nFeature_RNA < 6000 & percent.mt < 10)
VlnPlot(scRNA_combined_mm10,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
ncol = 3,
split.by="orig.ident")
# First let's do the same analysis without integration
scRNA_combined_mm10 <- FindVariableFeatures(scRNA_combined_mm10)
scRNA_combined_mm10 <- ScaleData(scRNA_combined_mm10, verbose = FALSE)
scRNA_combined_mm10 <- RunPCA(scRNA_combined_mm10, npcs = 30, verbose = FALSE)
scRNA_combined_mm10 <- RunUMAP(scRNA_combined_mm10, reduction = "pca", dims = 1:30)
## 17:22:23 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:22:23 Read 4017 rows and found 30 numeric columns
## 17:22:23 Using Annoy for neighbor search, n_neighbors = 30
## 17:22:23 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:22:24 Writing NN index file to temp file C:\Users\alba_\AppData\Local\Temp\Rtmps7vENO\file42f06908d22
## 17:22:24 Searching Annoy index using 1 thread, search_k = 3000
## 17:22:25 Annoy recall = 100%
## 17:22:26 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:22:27 Initializing from normalized Laplacian + noise (using irlba)
## 17:22:27 Commencing optimization for 500 epochs, with 158874 positive edges
## 17:22:40 Optimization finished
scRNA_combined_mm10 <- FindNeighbors(scRNA_combined_mm10, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
scRNA_combined_mm10 <- FindClusters(scRNA_combined_mm10, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4017
## Number of edges: 135529
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9071
## Number of communities: 14
## Elapsed time: 0 seconds
p1_mm10 <- DimPlot(scRNA_combined_mm10, reduction = "umap", group.by = "orig.ident") + ggtitle("mm10 pre-integration")
p2 <- DimPlot(scRNA_combined_mm10, reduction = "umap", label = TRUE, repel = TRUE)
p1_mm10 + p2
DimPlot(scRNA_combined_mm10, reduction = "umap", split.by = "orig.ident")
# Change the default to the integrated dataset
DefaultAssay(scRNA_combined_mm10) <- "integrated"
scRNA_combined_mm10 <- ScaleData(scRNA_combined_mm10, verbose = FALSE)
scRNA_combined_mm10 <- RunPCA(scRNA_combined_mm10, npcs = 30, verbose = FALSE)
scRNA_combined_mm10 <- RunUMAP(scRNA_combined_mm10, reduction = "pca", dims = 1:30)
## 17:22:46 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:22:46 Read 4017 rows and found 30 numeric columns
## 17:22:46 Using Annoy for neighbor search, n_neighbors = 30
## 17:22:46 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:22:47 Writing NN index file to temp file C:\Users\alba_\AppData\Local\Temp\Rtmps7vENO\file42f05b526cf2
## 17:22:47 Searching Annoy index using 1 thread, search_k = 3000
## 17:22:48 Annoy recall = 100%
## 17:22:49 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:22:51 Initializing from normalized Laplacian + noise (using irlba)
## 17:22:51 Commencing optimization for 500 epochs, with 167402 positive edges
## 17:23:11 Optimization finished
scRNA_combined_mm10 <- FindNeighbors(scRNA_combined_mm10, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
scRNA_combined_mm10 <- FindClusters(scRNA_combined_mm10, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4017
## Number of edges: 155284
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9020
## Number of communities: 13
## Elapsed time: 0 seconds
# Visualization
p2_mm10 <- DimPlot(scRNA_combined_mm10, reduction = "umap", group.by = "orig.ident") + ggtitle("mm10 post-integration")
p3 <- DimPlot(scRNA_combined_mm10, reduction = "umap", label = TRUE, repel = TRUE)
p2_mm10 + p3
DimPlot(scRNA_combined_mm10, reduction = "umap", split.by = "orig.ident")
After normalization and integration, the UMAPs of all subsets look quite similar, so we can assume integration is correct and we can proceed forward. Once this has been verified, we can go back a few steps and refine our analysis
scRNA_combined_mm10 <- JackStraw(scRNA_combined_mm10, num.replicate = 100)
scRNA_combined_mm10 <- ScoreJackStraw(scRNA_combined_mm10, dims=1:20)
JackStrawPlot(scRNA_combined_mm10, dims=1:20)
## Warning: Removed 28000 rows containing missing values (`geom_point()`).
ElbowPlot(scRNA_combined_mm10)
Based on these two graphs, we may consider using 10 PCs.
scRNA_combined_mm10 <- FindNeighbors(scRNA_combined_mm10, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
scRNA_combined_mm10 <- FindClusters(scRNA_combined_mm10, resolution = 0.6) # Try different levels of resolution and chose the one that results in cluster with most biological sense1
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4017
## Number of edges: 155284
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8901
## Number of communities: 14
## Elapsed time: 0 seconds
scRNA_combined_mm10 <- RunUMAP(scRNA_combined_mm10, dims = 1:10)
## 17:25:29 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:25:29 Read 4017 rows and found 10 numeric columns
## 17:25:29 Using Annoy for neighbor search, n_neighbors = 30
## 17:25:29 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:25:30 Writing NN index file to temp file C:\Users\alba_\AppData\Local\Temp\Rtmps7vENO\file42f064d65142
## 17:25:30 Searching Annoy index using 1 thread, search_k = 3000
## 17:25:31 Annoy recall = 100%
## 17:25:32 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:25:34 Initializing from normalized Laplacian + noise (using irlba)
## 17:25:34 Commencing optimization for 500 epochs, with 160050 positive edges
## 17:25:46 Optimization finished
DimPlot(scRNA_combined_mm10, reduction="umap")
scRNA_combined_mm10$orig.ident <- factor(x = scRNA_combined_mm10$orig.ident, levels = c("HBCx-95", "HBCx-95_CAPAR","HBCx-22","HBCx22-TAMR"))
plot_mm10_clustering <- DimPlot(scRNA_combined_mm10, reduction = "umap", split.by = "orig.ident",ncol=2) + ggtitle("Clustering in mm10 cells") + theme(plot.title = element_text(hjust=0.5))
mm10_markers <- FindAllMarkers(scRNA_combined_mm10, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 12
## Calculating cluster 13
mm10_markers %>%
group_by(cluster) %>%
slice_max(n = 2, order_by = avg_log2FC)
## # A tibble: 28 × 7
## # Groups: cluster [14]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 9.24e-198 2.83 0.84 0.692 1.85e-194 0 Gzme
## 2 1.97e-192 2.76 0.949 0.742 3.93e-189 0 Spp1
## 3 0 4.12 0.993 0.736 0 1 C1qa
## 4 0 3.83 0.994 0.657 0 1 C1qb
## 5 2.20e-154 2.76 0.901 0.72 4.39e-151 2 Sfrp4
## 6 1.77e-204 2.67 0.973 0.641 3.53e-201 2 Apod
## 7 4.81e- 97 2.93 0.909 0.617 9.61e- 94 3 Mmp3
## 8 6.67e- 11 2.66 0.682 0.654 1.33e- 7 3 Saa3
## 9 1.51e-168 3.23 0.99 0.409 3.02e-165 4 Sbsn
## 10 4.39e-142 3.21 0.946 0.569 8.78e-139 4 Dmkn
## # … with 18 more rows
mm10_markers %>%
group_by(cluster) %>%
top_n(n = 5, wt = avg_log2FC) -> top5
plot_mm10_heatmap <-DoHeatmap(scRNA_combined_mm10, features = top5$gene) + NoLegend()
MSigDB_df <- msigdbr(species = "Homo sapiens")
fgsea_sets<- MSigDB_df %>% split(x = .$gene_symbol, f = .$gs_name)
plot_GSEA <- function(cluster_number){
genes <- mm10_markers %>% filter(cluster==as.character(cluster_number)) %>%
arrange(desc(p_val_adj)) %>%
dplyr::select(gene, avg_log2FC)
# Create dataframe with genes
ranks <- deframe(genes)
# Perform GSEA analysis
fgseaRes <- fgsea(fgsea_sets,
stats = ranks,
minSize=10,
maxSize=500,
nperm=1000000)
# Plot
ggplot(fgseaRes %>% filter(padj < 0.01) %>% head(n= 20), aes(reorder(pathway, NES), NES)) +
geom_col(aes(fill= NES < 7.5)) +
coord_flip() +
labs(x="Pathway",
y="Normalized Enrichment Score",
title=paste0("Hallmark pathways NES from GSEA for cluster",as.character(cluster_number)))
}
plot_GSEA(0)
for (cluster in 0:5){plot_GSEA(cluster)}
plot_GO_mm10 <- function(cluster_number){
genes <- mm10_markers %>% filter(cluster==as.character(cluster_number))
# Calculate enrichment
enrichment <- enrichGO(gene = genes$gene,
OrgDb = org.Mm.eg.db,
keyType = "SYMBOL",
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
enrichment <- clusterProfiler::simplify(enrichment, cutoff=0.7, by="p.adjust", select_fun=min)
GO_ggdata <- enrichment %>%
as_data_frame() %>%
arrange(Count)
GO_ggdata$Description <- factor(GO_ggdata$Description, levels = GO_ggdata$Description)
print(tail(GO_ggdata))
ggplot(GO_ggdata, aes(x = Description, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity") +
scale_colour_viridis_d(begin=0,end=1) +
coord_flip() +
ylab("Number of genes") +
xlab("GO Terms") +
theme(axis.text.y = element_text(size=10))
}
for (cluster in 0:13){print(plot_GO_mm10(cluster))}
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0060537 muscle tis… 28/303 494/28… 5.36e-13 8.08e-11 4.96e-11 Csrp2… 28
## 2 GO:0001667 ameboidal-… 32/303 471/28… 6.39e-17 2.89e-14 1.78e-14 Timp1… 32
## 3 GO:0031589 cell-subst… 36/303 371/28… 4.27e-24 3.09e-21 1.90e-21 Col8a… 36
## 4 GO:0030198 extracellu… 48/303 322/28… 8.07e-41 1.32e-37 8.10e-38 Col8a… 48
## 5 GO:0043062 extracellu… 48/303 323/28… 9.40e-41 1.32e-37 8.10e-38 Col8a… 48
## 6 GO:0045229 external e… 48/303 324/28… 1.09e-40 1.32e-37 8.10e-38 Col8a… 48
## # … with abbreviated variable name ¹​GeneRatio
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0007159 leukocyte … 39/240 411/28… 1.37e-29 2.45e-26 1.51e-26 H2-Eb… 39
## 2 GO:0022407 regulation… 39/240 497/28… 1.74e-26 5.67e-24 3.49e-24 H2-Eb… 39
## 3 GO:0032103 positive r… 40/240 459/28… 6.65e-29 7.93e-26 4.88e-26 Ctss/… 40
## 4 GO:0002697 regulation… 40/240 464/28… 1.01e-28 9.04e-26 5.56e-26 Fcer1… 40
## 5 GO:0002449 lymphocyte… 40/240 483/28… 4.74e-28 2.92e-25 1.80e-25 C1qa/… 40
## 6 GO:0002460 adaptive i… 42/240 486/28… 3.43e-30 1.23e-26 7.56e-27 C1qa/… 42
## # … with abbreviated variable name ¹​GeneRatio
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0001667 ameboidal-… 28/266 471/28… 6.33e-15 1.41e-12 7.99e-13 Dcn/H… 28
## 2 GO:0050900 leukocyte … 29/266 385/28… 3.82e-18 2.41e-15 1.37e-15 Tnfai… 29
## 3 GO:0042060 wound heal… 33/266 380/28… 1.92e-22 1.81e-19 1.03e-19 Serpi… 33
## 4 GO:0030198 extracellu… 38/266 322/28… 1.12e-30 1.78e-27 1.01e-27 Dpt/H… 38
## 5 GO:0043062 extracellu… 38/266 323/28… 1.26e-30 1.78e-27 1.01e-27 Dpt/H… 38
## 6 GO:0045229 external e… 38/266 324/28… 1.41e-30 1.78e-27 1.01e-27 Dpt/H… 38
## # … with abbreviated variable name ¹​GeneRatio
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0031589 cell-subst… 30/259 371/28… 6.17e-20 2.55e-17 1.40e-17 Itgbl… 30
## 2 GO:0050678 regulation… 30/259 442/28… 8.29e-18 2.57e-15 1.40e-15 Sfrp1… 30
## 3 GO:0042060 wound heal… 32/259 380/28… 9.67e-22 4.49e-19 2.46e-19 Serpi… 32
## 4 GO:0030198 extracellu… 46/259 322/28… 1.70e-41 2.83e-38 1.55e-38 Mfap4… 46
## 5 GO:0043062 extracellu… 46/259 323/28… 1.97e-41 2.83e-38 1.55e-38 Mfap4… 46
## 6 GO:0045229 external e… 46/259 324/28… 2.28e-41 2.83e-38 1.55e-38 Mfap4… 46
## # … with abbreviated variable name ¹​GeneRatio
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0002683 negative r… 20/209 465/28… 2.18e-10 3.91e- 8 2.44e- 8 Cd55/… 20
## 2 GO:0001667 ameboidal-… 22/209 471/28… 5.24e-12 2.82e- 9 1.76e- 9 Sema3… 22
## 3 GO:0042060 wound heal… 29/209 380/28… 3.07e-21 2.48e-18 1.54e-18 Serpi… 29
## 4 GO:0030198 extracellu… 32/209 322/28… 6.06e-27 7.93e-24 4.94e-24 Dpt/H… 32
## 5 GO:0043062 extracellu… 32/209 323/28… 6.68e-27 7.93e-24 4.94e-24 Dpt/H… 32
## 6 GO:0045229 external e… 32/209 324/28… 7.37e-27 7.93e-24 4.94e-24 Dpt/H… 32
## # … with abbreviated variable name ¹​GeneRatio
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0010563 negative r… 34/417 453/28… 5.22e-15 6.24e-13 3.63e-13 Pecam… 34
## 2 GO:0045936 negative r… 34/417 453/28… 5.22e-15 6.24e-13 3.63e-13 Pecam… 34
## 3 GO:0040013 negative r… 38/417 360/28… 1.04e-21 3.55e-19 2.06e-19 Egfl7… 38
## 4 GO:0090132 epithelium… 46/417 330/28… 2.03e-31 1.79e-28 1.04e-28 Cdh5/… 46
## 5 GO:0045765 regulation… 47/417 308/28… 6.40e-34 1.42e-30 8.23e-31 Cdh5/… 47
## 6 GO:0001667 ameboidal-… 57/417 471/28… 2.05e-35 9.06e-32 5.27e-32 Cdh5/… 57
## # … with abbreviated variable name ¹​GeneRatio
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0060537 muscle tis… 24/237 494/28… 3.57e-12 9.98e-10 6.72e-10 Tnnt2… 24
## 2 GO:0001667 ameboidal-… 26/237 471/28… 2.23e-14 1.07e-11 7.19e-12 Acta2… 26
## 3 GO:0042060 wound heal… 28/237 380/28… 1.22e-18 1.02e-15 6.88e-16 Serpi… 28
## 4 GO:0030198 extracellu… 31/237 322/28… 5.33e-24 7.20e-21 4.84e-21 Col15… 31
## 5 GO:0043062 extracellu… 31/237 323/28… 5.86e-24 7.20e-21 4.84e-21 Col15… 31
## 6 GO:0045229 external e… 31/237 324/28… 6.43e-24 7.20e-21 4.84e-21 Col15… 31
## # … with abbreviated variable name ¹​GeneRatio
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0002697 regulation… 38/245 464/28… 3.75e-26 8.39e-24 5.45e-24 Il1b/… 38
## 2 GO:0030098 lymphocyte… 38/245 471/28… 6.46e-26 1.33e-23 8.63e-24 Il1b/… 38
## 3 GO:0050900 leukocyte … 39/245 385/28… 2.54e-30 1.39e-27 9.05e-28 Il1b/… 39
## 4 GO:0050863 regulation… 40/245 378/28… 7.72e-32 5.08e-29 3.30e-29 Il1b/… 40
## 5 GO:1903037 regulation… 43/245 371/28… 6.61e-36 1.09e-32 7.06e-33 Il1b/… 43
## 6 GO:0007159 leukocyte … 50/245 411/28… 7.54e-43 2.48e-39 1.61e-39 Il1b/… 50
## # … with abbreviated variable name ¹​GeneRatio
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adj…² qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:1901342 regulation of… 12/127 312/28… 1.42e-8 5.06e-6 3.43e-6 Pkm/P… 12
## 2 GO:0042060 wound healing 12/127 380/28… 1.22e-7 2.08e-5 1.41e-5 Lox/A… 12
## 3 GO:0042692 muscle cell d… 12/127 449/28… 7.22e-7 5.68e-5 3.85e-5 Krt19… 12
## 4 GO:0002683 negative regu… 12/127 465/28… 1.04e-6 7.51e-5 5.09e-5 Mif/S… 12
## 5 GO:0010876 lipid localiz… 12/127 470/28… 1.16e-6 8.17e-5 5.54e-5 Mif/S… 12
## 6 GO:0060537 muscle tissue… 13/127 494/28… 2.90e-7 3.76e-5 2.55e-5 Ankrd… 13
## # … with abbreviated variable names ¹​GeneRatio, ²​p.adjust
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:1903037 regulation… 27/195 371/28… 4.23e-20 1.56e-17 1.04e-17 Cd160… 27
## 2 GO:0070661 leukocyte … 27/195 384/28… 1.03e-19 3.07e-17 2.05e-17 Sh2d2… 27
## 3 GO:0051251 positive r… 27/195 465/28… 1.35e-17 2.34e-15 1.56e-15 Cd160… 27
## 4 GO:0050863 regulation… 30/195 378/28… 2.43e-23 2.39e-20 1.59e-20 Rac2/… 30
## 5 GO:0007159 leukocyte … 31/195 411/28… 1.96e-23 2.39e-20 1.59e-20 Rac2/… 31
## 6 GO:0030098 lymphocyte… 35/195 471/28… 3.51e-26 1.04e-22 6.91e-23 Cd3g/… 35
## # … with abbreviated variable name ¹​GeneRatio
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0140694 non-membra… 28/212 379/28… 5.49e-20 1.03e-17 8.20e-18 Birc5… 28
## 2 GO:0044772 mitotic ce… 31/212 451/28… 4.02e-21 8.12e-19 6.46e-19 Birc5… 31
## 3 GO:0140014 mitotic nu… 33/212 309/28… 1.38e-28 9.06e-26 7.21e-26 Birc5… 33
## 4 GO:0098813 nuclear ch… 35/212 306/28… 2.44e-31 3.21e-28 2.56e-28 Birc5… 35
## 5 GO:0007059 chromosome… 40/212 369/28… 7.52e-35 1.98e-31 1.57e-31 Ska1/… 40
## 6 GO:0000280 nuclear di… 40/212 478/28… 2.08e-30 1.82e-27 1.45e-27 Birc5… 40
## # … with abbreviated variable name ¹​GeneRatio
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0002683 negative r… 28/208 465/28… 7.17e-18 9.76e-16 5.97e-16 Pla2g… 28
## 2 GO:0002460 adaptive i… 28/208 486/28… 2.25e-17 2.85e-15 1.74e-15 Batf/… 28
## 3 GO:0030595 leukocyte … 30/208 228/28… 4.79e-29 8.47e-26 5.18e-26 Ccl6/… 30
## 4 GO:0032103 positive r… 30/208 459/28… 4.34e-20 1.03e-17 6.27e-18 Osm/C… 30
## 5 GO:0070661 leukocyte … 31/208 384/28… 1.90e-23 9.62e-21 5.89e-21 Pla2g… 31
## 6 GO:0050900 leukocyte … 36/208 385/28… 2.16e-29 7.65e-26 4.68e-26 Ccl6/… 36
## # … with abbreviated variable name ¹​GeneRatio
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0070661 leukocyte … 34/274 384/28… 6.00e-23 1.11e-19 7.09e-20 Tnfsf… 34
## 2 GO:0007159 leukocyte … 34/274 411/28… 5.37e-22 3.78e-19 2.42e-19 Selpl… 34
## 3 GO:0002449 lymphocyte… 34/274 483/28… 8.93e-20 1.65e-17 1.06e-17 C1qc/… 34
## 4 GO:0002460 adaptive i… 34/274 486/28… 1.08e-19 1.74e-17 1.11e-17 C1qc/… 34
## 5 GO:0032103 positive r… 35/274 459/28… 1.85e-21 9.45e-19 6.05e-19 Hmgb2… 35
## 6 GO:0002697 regulation… 36/274 464/28… 2.65e-22 2.33e-19 1.49e-19 Rac2/… 36
## # … with abbreviated variable name ¹​GeneRatio
## # A tibble: 6 × 9
## ID Description GeneR…¹ BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 GO:0070661 leukocyte … 22/133 384/28… 5.41e-18 1.15e-15 7.38e-16 Ccl5/… 22
## 2 GO:0002683 negative r… 22/133 465/28… 3.01e-16 3.85e-14 2.46e-14 Id2/L… 22
## 3 GO:0051251 positive r… 22/133 465/28… 3.01e-16 3.85e-14 2.46e-14 Ccl5/… 22
## 4 GO:0030098 lymphocyte… 22/133 471/28… 3.92e-16 4.78e-14 3.06e-14 Cd3g/… 22
## 5 GO:0007159 leukocyte … 23/133 411/28… 1.50e-18 4.78e-16 3.06e-16 Ccl5/… 23
## 6 GO:0002449 lymphocyte… 24/133 483/28… 3.82e-18 9.00e-16 5.76e-16 Nkg7/… 24
## # … with abbreviated variable name ¹​GeneRatio
MouseRNAseqData(ensembl = FALSE, cell.ont = c("all", "nonna", "none"))
## Cannot connect to ExperimentHub server, using 'localHub=TRUE' instead
## Using 'localHub=TRUE'
## If offline, please also see BiocManager vignette section on offline use
## snapshotDate(): 2023-01-13
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
## class: SummarizedExperiment
## dim: 21214 358
## metadata(0):
## assays(1): logcounts
## rownames(21214): Xkr4 Rp1 ... LOC100039574 LOC100039753
## rowData names(0):
## colnames(358): ERR525589Aligned ERR525592Aligned ... SRR1044043Aligned
## SRR1044044Aligned
## colData names(3): label.main label.fine label.ont
mm10_geneinfo <- rev_gene(data = scRNA_combined_mm10[['integrated']]@data,
data_type = "data",
species="Mouse",
geneinfo = geneinfo)
mm10_ann_1 <- createscCATCH(data = scRNA_combined_mm10[['integrated']]@data,
cluster = as.character(Idents(scRNA_combined_mm10)))
mm10_ann_1 <- findmarkergene(object = mm10_ann_1, species = "Mouse", marker = cellmatch, tissue="Mammary gland", use_method = "2")
## There are 380 potential marker genes in CellMatch database for Mouse on Mammary gland.
mm10_ann_1 <- findcelltype(mm10_ann_1)
mm10_ann_1@celltype
## cluster
## 1 4
## 2 5
## 3 0
## 4 7
## 5 1
## 6 2
## 7 6
## 8 3
## 9 9
## 10 11
## 11 8
## 12 13
## 13 12
## 14 10
## cluster_marker
## 1 Igfbp5, Apod, Mt1, Clec3b, Gsn, Aldh1a3, Dcn, Plet1, Serpinb1a, Igfbp4, Efemp1, Procr, Igfbp6, Rarres2, Col1a2, Fn1, Htra3, S100a4, Ogn, Dpt, Ly6c1, Col3a1, Lum, Gadd45g, Ctsl, Mfap5, Serping1, Glul, Gpx3, Ly6a, Dpep1, Serpinf1, Ctsk, Pcolce, Loxl1, Fstl1, Phlda1, Plpp3, S100a6, Aebp1, Pdpn, C1s1, Mmp2, Lsp1, Anxa1, Anxa2
## 2 Fabp4, Ptn, Gng11, Lrg1, Col4a2, Atf3, Procr, Col4a1, Ly6c1, Id3, Rbp1, Fscn1, Crip2, Sparc, Csrp1, Tspan13, Ly6a, Cdkn1a, Tsc22d1, Kit, Itga6, Cd200, Gata2, Cnn3, Slc12a2, Igfbp7, S100a6, Serpinh1, Vim, Anxa2
## 3 Actg2, Igfbp5, Cxcl14, Gng11, Acta2, Igfbp2, Tagln, Ctsd, Myl9, Postn, Tpm2, Igfbp4, Igfbp6, Tpm1, Itm2a, Col6a3, Col1a1, Col1a2, Fn1, Col3a1, Rbp1, Gadd45g, Fscn1, Crip2, Col6a1, Mfap5, Sparc, Serping1, Csrp1, Mfge8, Col6a2, Gpx3, Bgn, Lgals1, Tsc22d1, Serpinf1, Col5a2, Ctsk, Cd200, Cnn3, Pcolce, Mfap2, Loxl1, Fstl1, Mmp14, Gadd45b, Igfbp7, S100a6, Rcn3, Serpinh1, Aebp1, Vim, Tmem176a, Mmp2, Lsp1, Anxa1, Anxa2
## 4 Ccl5, Hp, H2-Ab1, Lyz2, Ccl6, H2-Aa, Ctss, Cd14, Atf3, Cd68, H2-Eb1, AW112010, S100a4, Cd24a, Fcer1g, Cfp, Trf, Aif1, Tyrobp, Ms4a6c, Tspan13, Cdkn1a, Ctsc, Tgm2
## 5 Ccl5, H2-Ab1, Lyz2, Ccl6, H2-Aa, Ctss, C1qc, Ctsd, Cd14, Cd68, H2-Eb1, AW112010, Fcer1g, Trf, Aif1, Tyrobp, Ms4a6c, Tspan13, Cdkn1a, Ctsc, Tgm2, Ctsb, Lgmn, Grn
## 6 Cxcl14, Apod, Mt1, Lcn2, Lpl, Clec3b, Gsn, Dcn, Serpinb1a, Igfbp4, Efemp1, Procr, Igfbp6, Col4a1, Rarres2, Col6a3, Col1a1, Col1a2, Fn1, Htra3, Ogn, Dpt, Ly6c1, Col3a1, Id3, Lum, Gadd45g, Ctsl, Col6a1, Mfap5, Sparc, Serping1, Glul, Col6a2, Gpx3, Ly6a, Dpep1, Bgn, Serpinf1, Col5a2, Ctsk, Pcolce, Mfap2, Loxl1, Fstl1, Phlda1, Plpp3, S100a6, Rcn3, Serpinh1, Aebp1, Pdpn, Dbi, C1s1, Mmp2, Anxa1, Anxa2
## 7 Actg2, Gng11, Acta2, Tagln, Mylk, Myl9, Cnn1, Col4a2, Postn, Tpm2, Tpm1, Col4a1, Rarres2, Col6a3, Col1a1, Col1a2, Fn1, S100a4, Col3a1, Crip2, Sparc, Csrp1, Mfge8, Bgn, Lgals1, Col5a2, Pcolce, Fstl1, Igfbp7, Cystm1, S100a6, Rcn3, Serpinh1, Aebp1, Vim, Anxa1, Anxa2
## 8 Hp, Cxcl14, Apod, Mt1, Clec3b, Dcn, Postn, Igfbp4, Igfbp6, Itm2a, Rarres2, Col1a1, Col1a2, Fn1, Ogn, Dpt, Col3a1, Rbp1, Lum, Gadd45g, Ctsl, Col6a1, Mfap5, Sparc, Serping1, Glul, Mfge8, Col6a2, Gpx3, Ly6a, Dpep1, Bgn, Lgals1, Tsc22d1, Serpinf1, Col5a2, Ctsk, Pcolce, Mfap2, Loxl1, Fstl1, Mmp14, Phlda1, S100a6, Rcn3, Serpinh1, Aebp1, Pdpn, Dbi, Vim, C1s1, Tmem176a, Mmp2, Lsp1, Anxa1, Anxa2
## 9 Ccl5, Nkg7, AW112010, S100a4, Tspan13, Lsp1
## 10 H2-Ab1, Lyz2, Ccl6, H2-Aa, Pf4, Mt1, Ctss, C1qc, Ctsd, Cd14, Atf3, Cd68, H2-Eb1, AW112010, S100a4, Fcer1g, Cfp, Trf, Aif1, Tyrobp, Ms4a6c, Fxyd2, Cdkn1a, Ctsc, Ctsb, Cystm1, Dbi, Lgmn, Tmem176a, Grn
## 11 Actg2, Gng11, Acta2, Mt1, Tagln, Myl9, Cnn1, Postn, Tpm2, Tpm1, Col6a3, Fn1, Crip2, Csrp1, Bgn, Lgals1, Serpinf1, Col5a2, Cnn3, S100a6, Vim, Anxa1
## 12 Ccl5, H2-Ab1, Lyz2, Nkg7, H2-Aa, Ctss, C1qc, Ctsd, Cd14, Cd68, H2-Eb1, AW112010, Fcer1g, Trf, Aif1, Tyrobp, Ms4a6c, Cdkn1a, Ctsc, Tgm2, Ctsb, Lgmn, Grn
## 13 H2-Ab1, Lyz2, H2-Aa, Ctss, C1qc, Ctsd, Cd14, Cd68, H2-Eb1, AW112010, Fcer1g, Cfp, Trf, Tuba1b, Aif1, Tyrobp, Ms4a6c, Cdkn1a, Ctsc, Tgm2, Ctsb, Lgmn, Tmem176a, Grn
## 14 Actg2, Gng11, Acta2, Tagln, Myl9, Dcn, Postn, Tpm2, Tpm1, Col4a1, Col6a3, Col1a1, Col1a2, Fn1, S100a4, Col3a1, Tuba1b, Rbp1, Lum, Crip2, Col6a1, Sparc, Csrp1, Mfge8, Col6a2, Bgn, Lgals1, Serpinf1, Col5a2, Cnn3, Pcolce, Loxl1, Fstl1, Igfbp7, Phlda1, S100a6, Rcn3, Serpinh1, Vim, Anxa1, Anxa2
## cell_type celltype_score
## 1 Luminal Progenitor Cell 0.83
## 2 Luminal Progenitor Cell 0.88
## 3 Myoepithelial Cell 0.78
## 4 Luminal Progenitor Cell 0.79
## 5 Luminal Progenitor Cell 0.73
## 6 Luminal Progenitor Cell 0.81
## 7 Myoepithelial Cell 0.79
## 8 Luminal Progenitor Cell 0.82
## 9 Killer Cell, Luminal Cell 0.58, 0.58
## 10 Luminal Progenitor Cell 0.73
## 11 Myoepithelial Cell 0.77
## 12 Luminal Progenitor Cell 0.73
## 13 Luminal Progenitor Cell 0.73
## 14 Myoepithelial Cell 0.78
## celltype_related_marker
## 1 Aldh1a3, Plet1, Igfbp5, Glul, Anxa2, Phlda1
## 2 Kit, Lrg1, Slc12a2, Ptn, Tsc22d1, Anxa2
## 3 Acta2, Tagln, Actg2, Igfbp2, Cxcl14, Myl9, Tpm2, Tpm1, Csrp1, Rbp1, Mfge8, Serpinh1
## 4 Cd14, Tgm2
## 5 Cd14, Tgm2
## 6 Glul, Lcn2, Dbi, Anxa2, Phlda1
## 7 Acta2, Tagln, Actg2, Cnn1, Mylk, Myl9, Tpm2, Tpm1, Csrp1, Mfge8, Col4a1, Col4a2, Serpinh1
## 8 Mfge8, Glul, Tsc22d1, Dbi, Anxa2, Phlda1
## 9 AW112010, Ccl5, Tspan13, Nkg7
## 10 Cd14, Dbi
## 11 Acta2, Tagln, Actg2, Cnn1, Myl9, Tpm2, Tpm1, Csrp1
## 12 Cd14, Tgm2
## 13 Cd14, Tgm2
## 14 Acta2, Tagln, Actg2, Myl9, Tpm2, Tpm1, Csrp1, Rbp1, Mfge8, Col4a1, Serpinh1
## PMID
## 1 29225342, 29775597
## 2 21132007, 29225342, 29775597
## 3 29225342, 29775597
## 4 29225342, 29775597
## 5 29225342, 29775597
## 6 29775597
## 7 29225342, 29775597
## 8 29775597
## 9 29775597
## 10 29225342, 29775597
## 11 29225342, 29775597
## 12 29225342, 29775597
## 13 29225342, 29775597
## 14 29225342, 29775597
mRNAseq.se <- MouseRNAseqData()
## Cannot connect to ExperimentHub server, using 'localHub=TRUE' instead
## Using 'localHub=TRUE'
## If offline, please also see BiocManager vignette section on offline use
## snapshotDate(): 2023-01-13
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
mRNAseq.se
## class: SummarizedExperiment
## dim: 21214 358
## metadata(0):
## assays(1): logcounts
## rownames(21214): Xkr4 Rp1 ... LOC100039574 LOC100039753
## rowData names(0):
## colnames(358): ERR525589Aligned ERR525592Aligned ... SRR1044043Aligned
## SRR1044044Aligned
## colData names(3): label.main label.fine label.ont
mm10_ann_2 <- SingleR(test = scRNA_combined_mm10[['integrated']]@data,
ref = mRNAseq.se,
assay.type.test=1,
labels = mRNAseq.se$label.main,
clusters = scRNA_combined_mm10@meta.data[["seurat_clusters"]])
mm10_ann_2
## DataFrame with 14 rows and 4 columns
## scores labels delta.next pruned.labels
## <matrix> <character> <numeric> <character>
## 0 0.625067:0.3236409:0.1017131:... Fibroblasts 0.0654643 Fibroblasts
## 1 0.171448:0.0543392:0.5195881:... Macrophages 0.0850762 Macrophages
## 2 0.629893:0.3062046:0.0609768:... Fibroblasts 0.5194622 Fibroblasts
## 3 0.622290:0.3135865:0.0582042:... Fibroblasts 0.0562530 Fibroblasts
## 4 0.591171:0.2940385:0.0624225:... Fibroblasts 0.4954680 Fibroblasts
## ... ... ... ... ...
## 9 0.0653065:0.0483964:0.536450:... NK cells 0.01491557 NK cells
## 10 0.5634213:0.2909614:0.196302:... Fibroblasts 0.10017724 Fibroblasts
## 11 0.1781777:0.0493389:0.363372:... Macrophages 0.07486630 Macrophages
## 12 0.1327699:0.0769117:0.506982:... Macrophages 0.02656899 Macrophages
## 13 0.0881576:0.0415777:0.491922:... Monocytes 0.00661534 Monocytes
new_clusters_mm10 <- c("Fibroblasts 1","Macrophages 1","Fibroblasts 2","Fibroblasts 3","Fibroblasts 4","Endothelial cells","Fibroblasts 5","Monocytes 1","Fibroblasts 6"," NK cells","Fibroblasts 7","Macrophages 2","Macrophages 3","Monocytes 2")
names(new_clusters_mm10) <- levels(scRNA_combined_mm10)
scRNA_combined_mm10 <- RenameIdents(scRNA_combined_mm10, new_clusters_mm10)
DimPlot(scRNA_combined_mm10, reduction="umap", label=T, label.box=F, label.size = 3.5)
ggarrange(plot_qc_hg38_1, plot_qc_hg38_2, plot_qc_mm10_1, plot_qc_mm10_2, common.legend = T, legend="right", labels=c("A","B","C","D"))
## Warning: Removed 50 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 50 rows containing missing values (`geom_point()`).
## Warning: Removed 50 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 50 rows containing missing values (`geom_point()`).
## Warning: Removed 2 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 10 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 10 rows containing missing values (`geom_point()`).
ggarrange(p1_hg38, p2_hg38, p1_mm10, p2_mm10, common.legend = T, legend="right", labels=c("A","B","C","D"))
ggarrange(plot_hg19_clustering, plot_hg19_heatmap, legend="right", labels=c("A","B"))
ggarrange(plot_mm10_clustering, plot_mm10_heatmap, legend="right", labels=c("A","B"))
We will store the relevant objects to use them in other analyses
# First let's store cluster identity in the meta.data slot
scRNA_combined@meta.data$cluster_name <- scRNA_combined@active.ident
scRNA_combined_mm10@meta.data$cluster_name <- scRNA_combined_mm10@active.ident
# Now save the objects in a rds file
saveRDS(scRNA_combined, "data/R_objects/hg19_RNA.rds")
saveRDS(scRNA_combined_mm10, "data/R_objects/mm10_RNA.rds")
We will also save markers for cell annotation of scChIP data
write.xlsx(file="data/R_objects/markers_RNA_hg19.xlsx", hg19_markers)
write.xlsx(file="data/R_objects/markers_RNA_mm10.xlsx", mm10_markers)
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22000)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=Spanish_Spain.utf8 LC_CTYPE=Spanish_Spain.utf8
## [3] LC_MONETARY=Spanish_Spain.utf8 LC_NUMERIC=C
## [5] LC_TIME=Spanish_Spain.utf8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] celldex_1.8.0 openxlsx_4.2.5.1
## [3] ggpubr_0.5.0 org.Mm.eg.db_3.16.0
## [5] org.Hs.eg.db_3.16.0 AnnotationDbi_1.60.0
## [7] clusterProfiler_4.6.0 tibble_3.1.8
## [9] msigdbr_7.5.1 SingleR_2.0.0
## [11] SummarizedExperiment_1.28.0 Biobase_2.58.0
## [13] GenomicRanges_1.50.1 GenomeInfoDb_1.34.3
## [15] IRanges_2.32.0 S4Vectors_0.36.0
## [17] BiocGenerics_0.44.0 MatrixGenerics_1.10.0
## [19] matrixStats_0.63.0 ggplot2_3.4.0
## [21] scCATCH_3.2.1 purrr_0.3.5
## [23] SeuratObject_4.1.3 Seurat_4.3.0
## [25] stringr_1.4.1 dplyr_1.0.10
## [27] R.utils_2.12.2 R.oo_1.25.0
## [29] R.methodsS3_1.8.2
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 scattermore_0.8
## [3] tidyr_1.2.1 bit64_4.0.5
## [5] knitr_1.41 irlba_2.3.5.1
## [7] DelayedArray_0.24.0 data.table_1.14.6
## [9] KEGGREST_1.38.0 RCurl_1.98-1.9
## [11] generics_0.1.3 ScaledMatrix_1.6.0
## [13] cowplot_1.1.1 RSQLite_2.2.19
## [15] shadowtext_0.1.2 RANN_2.6.1
## [17] future_1.29.0 bit_4.0.5
## [19] enrichplot_1.18.1 spatstat.data_3.0-0
## [21] httpuv_1.6.6 assertthat_0.2.1
## [23] viridis_0.6.2 xfun_0.35
## [25] hms_1.1.2 jquerylib_0.1.4
## [27] babelgene_22.9 evaluate_0.18
## [29] promises_1.2.0.1 fansi_1.0.3
## [31] progress_1.2.2 dbplyr_2.2.1
## [33] igraph_1.3.5 DBI_1.1.3
## [35] htmlwidgets_1.5.4 spatstat.geom_3.0-3
## [37] ellipsis_0.3.2 backports_1.4.1
## [39] deldir_1.0-6 sparseMatrixStats_1.10.0
## [41] vctrs_0.5.1 ROCR_1.0-11
## [43] abind_1.4-5 cachem_1.0.6
## [45] withr_2.5.0 ggforce_0.4.1
## [47] HDO.db_0.99.1 progressr_0.11.0
## [49] sctransform_0.3.5 treeio_1.22.0
## [51] prettyunits_1.1.1 goftest_1.2-3
## [53] cluster_2.1.4 DOSE_3.24.2
## [55] ExperimentHub_2.6.0 ape_5.6-2
## [57] lazyeval_0.2.2 crayon_1.5.2
## [59] spatstat.explore_3.0-5 labeling_0.4.2
## [61] pkgconfig_2.0.3 tweenr_2.0.2
## [63] vipor_0.4.5 nlme_3.1-160
## [65] rlang_1.0.6 globals_0.16.2
## [67] lifecycle_1.0.3 miniUI_0.1.1.1
## [69] downloader_0.4 filelock_1.0.2
## [71] BiocFileCache_2.6.0 rsvd_1.0.5
## [73] AnnotationHub_3.6.0 ggrastr_1.0.1
## [75] polyclip_1.10-4 lmtest_0.9-40
## [77] Matrix_1.5-1 aplot_0.1.9
## [79] carData_3.0-5 zoo_1.8-11
## [81] beeswarm_0.4.0 ggridges_0.5.4
## [83] png_0.1-7 viridisLite_0.4.1
## [85] bitops_1.0-7 gson_0.0.9
## [87] KernSmooth_2.23-20 Biostrings_2.66.0
## [89] blob_1.2.3 DelayedMatrixStats_1.20.0
## [91] qvalue_2.30.0 parallelly_1.32.1
## [93] spatstat.random_3.0-1 rstatix_0.7.1
## [95] gridGraphics_0.5-1 ggsignif_0.6.4
## [97] beachmat_2.14.0 scales_1.2.1
## [99] memoise_2.0.1 magrittr_2.0.3
## [101] plyr_1.8.8 ica_1.0-3
## [103] zlibbioc_1.44.0 compiler_4.2.2
## [105] scatterpie_0.1.8 RColorBrewer_1.1-3
## [107] fitdistrplus_1.1-8 cli_3.4.1
## [109] XVector_0.38.0 listenv_0.8.0
## [111] patchwork_1.1.2 pbapply_1.6-0
## [113] MASS_7.3-58.1 tidyselect_1.2.0
## [115] stringi_1.7.8 highr_0.9
## [117] yaml_2.3.6 GOSemSim_2.24.0
## [119] BiocSingular_1.14.0 ggrepel_0.9.2
## [121] grid_4.2.2 sass_0.4.4
## [123] fastmatch_1.1-3 tools_4.2.2
## [125] future.apply_1.10.0 parallel_4.2.2
## [127] rstudioapi_0.14 gridExtra_2.3
## [129] farver_2.1.1 Rtsne_0.16
## [131] ggraph_2.1.0 BiocManager_1.30.19
## [133] digest_0.6.30 shiny_1.7.3
## [135] Rcpp_1.0.9 car_3.1-1
## [137] broom_1.0.1 BiocVersion_3.16.0
## [139] later_1.3.0 RcppAnnoy_0.0.20
## [141] httr_1.4.4 colorspace_2.0-3
## [143] tensor_1.5 reticulate_1.26
## [145] splines_4.2.2 uwot_0.1.14
## [147] yulab.utils_0.0.5 tidytree_0.4.1
## [149] spatstat.utils_3.0-1 graphlayouts_0.8.4
## [151] sp_1.5-1 ggplotify_0.1.0
## [153] plotly_4.10.1 xtable_1.8-4
## [155] jsonlite_1.8.3 ggtree_3.6.2
## [157] tidygraph_1.2.2 ggfun_0.0.9
## [159] R6_2.5.1 pillar_1.8.1
## [161] htmltools_0.5.3 mime_0.12
## [163] glue_1.6.2 fastmap_1.1.0
## [165] BiocParallel_1.32.1 interactiveDisplayBase_1.36.0
## [167] codetools_0.2-18 fgsea_1.24.0
## [169] utf8_1.2.2 lattice_0.20-45
## [171] bslib_0.4.1 spatstat.sparse_3.0-0
## [173] curl_4.3.3 ggbeeswarm_0.6.0
## [175] leiden_0.4.3 zip_2.2.2
## [177] GO.db_3.16.0 limma_3.54.0
## [179] survival_3.4-0 rmarkdown_2.18
## [181] munsell_0.5.0 GenomeInfoDbData_1.2.9
## [183] reshape2_1.4.4 gtable_0.3.1